Introduction
Basic data visualization of beatmap info provided by osu! API. https://github.com/ppy/osu-api/wiki
Only uses ranked/loved/qualified maps. Graphs focusing on standard mode.
(A few maps, due to ranked/loved irregularities, are actually in graveyard but included in the API query anyway. Ex. https://osu.ppy.sh/b/766190&m=2 with 1 loved CtB diff. )
library(ggplot2)
library(gridExtra)
library(plyr)
library(jsonlite)
library(varhandle)
library(chron)
library(magrittr)
# Cached chunk
# Import data from JSON and remove duplicate rows
beatmaps <- unique(do.call("rbind", fromJSON("maps.json")))
# Set global figure width and height
knitr::opts_chunk$set(fig.width=10, fig.height=6)
# Disable warnings and messages
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
# Convert strings of ints and floats to numeric datatypes
beatmaps.isintcol <- sapply(beatmaps, function(col) all(check.numeric(col, only.integer=TRUE)))
beatmaps.isnumcol <- sapply(beatmaps, function(col) all(check.numeric(col))) & !beatmaps.isintcol
beatmaps[, beatmaps.isintcol] = sapply(beatmaps[, beatmaps.isintcol], as.integer)
beatmaps[, beatmaps.isnumcol] = sapply(beatmaps[, beatmaps.isnumcol], as.numeric)
# Convert MySQL datetimes to R datetimes. As far as the new site goes, this appears to be UTC-4, but
# this may be a local thing. Probably broken.
beatmaps$approved_date <- as.POSIXct(beatmaps$approved_date, tz="Etc/GMT+4")
beatmaps$last_update <- as.POSIXct(beatmaps$last_update, tz="Etc/GMT+4")
# Create labels and data frames for each gamemode
gamemodes <- c("std", "taiko", "ctb", "mania")
gamemode.labels <- c("Standard", "Taiko", "CtB", "Mania")
beatmaps$mode <- factor(beatmaps$mode, labels=gamemode.labels)
for (i in 1:4) {
assign(gamemodes[i], beatmaps[beatmaps$mode == gamemode.labels[i],])
}
# Various plot parameters for convenience
# These will usually leave a few outlier maps out
diff_x_scale <- scale_x_continuous(limits=c(0,10), breaks=seq(0,10,0.5))
diff_hist <- geom_histogram(binwidth=0.05)
length_x_scale <- scale_x_continuous(limits=c(0,600), breaks=seq(0,600,30))
length_hist <- geom_histogram(binwidth=1)
AR_y_scale <- scale_y_continuous(breaks=seq(0,10))
SR_y_scale <- scale_y_continuous(limits=c(0,10), breaks=seq(0,10,1))
approved_x_scale <- scale_x_datetime(date_breaks="1 year", date_labels="%Y")
legend_title_fill <- labs(fill="Mode")
legend_title_color <- labs(color="Mode")
# Center titles
theme_update(plot.title = element_text(hjust = 0.5))
Plots
# Histogram of star rating (all modes)
ggplot(beatmaps, aes(difficultyrating, fill=as.factor(mode))) +
ggtitle("Total Star Rating (All Modes)") + legend_title_fill +
diff_x_scale + diff_hist

# Frequency polygon of SR (all modes)
ggplot(beatmaps, aes(difficultyrating, color=as.factor(mode))) +
ggtitle("Star Rating (All Modes)") + legend_title_color +
diff_x_scale + geom_freqpoly(binwidth=0.05)

# Histograms of SR (all modes) by faceting
ggplot(beatmaps, aes(difficultyrating)) +
diff_x_scale + diff_hist +
facet_wrap("mode") +
labs(title = "Star Rating Distributions (All Modes)")

# Histograms of SR (all modes) with separate y scales
diffplots0 <- ggplot(std , aes(difficultyrating)) + diff_x_scale + diff_hist + ggtitle("Standard")
diffplots1 <- ggplot(taiko, aes(difficultyrating)) + diff_x_scale + diff_hist + ggtitle("Taiko")
diffplots2 <- ggplot(ctb , aes(difficultyrating)) + diff_x_scale + diff_hist + ggtitle("CtB")
diffplots3 <- ggplot(mania, aes(difficultyrating)) + diff_x_scale + diff_hist + ggtitle("Mania")
grid.arrange(diffplots0, diffplots1, diffplots2, diffplots3,
top="Star Rating Distributions (All Modes)")

# Histogram of total length (all modes)
ggplot(beatmaps, aes(total_length, fill=as.factor(mode))) +
ggtitle("Total Beatmap Length (All Modes)") + legend_title_fill +
length_x_scale + length_hist

# Frequency polygon of total length (all modes)
ggplot(beatmaps, aes(total_length, color=as.factor(mode))) +
ggtitle("Beatmap Length (All Modes)") + legend_title_color +
length_x_scale + geom_freqpoly(binwidth=1)

# Histograms of total length (all modes)
lengthplots0 <- ggplot(std , aes(total_length)) + length_x_scale + length_hist + ggtitle("Standard")
lengthplots1 <- ggplot(taiko, aes(total_length)) + length_x_scale + length_hist + ggtitle("Taiko")
lengthplots2 <- ggplot(ctb , aes(total_length)) + length_x_scale + length_hist + ggtitle("CtB")
lengthplots3 <- ggplot(mania, aes(total_length)) + length_x_scale + length_hist + ggtitle("Mania")
grid.arrange(lengthplots0, lengthplots1, lengthplots2, lengthplots3,
top="Beatmap Length Distributions (All Modes)")

# Frequency polygons of playcount (all modes)
ggplot(beatmaps, aes(playcount, color=as.factor(mode))) +
ggtitle("Playcount (All Modes)") + legend_title_color +
scale_x_continuous(limits=c(0,1000000)) +
geom_freqpoly(binwidth=5000)

beatmaps$month <- cut(beatmaps$approved_date, breaks="month")
year_x_scale <- scale_x_discrete(breaks=unique(cut(beatmaps$approved_date, breaks="year")))
rotate_x_labels <- theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Histogram of date approved (all modes)
ggplot(beatmaps, aes(x=month, fill=mode)) +
ggtitle("Date Approved (All Modes)") +
geom_histogram(stat="count", width=1) +
year_x_scale + rotate_x_labels

# Frequency polygon of date approved (all modes)
ggplot(beatmaps, aes(x=month, group=mode, color=mode)) +
ggtitle("Date Approved (All Modes)") +
legend_title_color +
geom_freqpoly(stat="count") +
year_x_scale + rotate_x_labels

Tables
library(knitr)
most.frequent.kable <- function(arr, lab) {
# Neat use of pipe
arr %>% table %>% sort(decreasing=TRUE) %>% head(20) %>% kable(col.names=c(lab, "Freq"))
}
# Most frequent artists, titles, sources, and creators
beatmaps$artist %>% most.frequent.kable("Artist")
| Hatsune Miku |
653 |
| ClariS |
421 |
| KOTOKO |
398 |
| fripSide |
362 |
| xi |
349 |
| senya |
339 |
| IOSYS |
331 |
| Various Artists |
329 |
| LiSA |
323 |
| yanaginagi |
311 |
| Camellia |
309 |
| ZUN |
272 |
| Duca |
257 |
| Rita |
255 |
| M2U |
245 |
| Chata |
226 |
| u’s |
225 |
| Hanatan |
224 |
| nano |
224 |
| Suzuki Konomi |
211 |
beatmaps$title %>% most.frequent.kable("Title")
| Piano 7K BMS Pack |
193 |
| Piano Beatmap Set |
114 |
| Harumachi Clover |
70 |
| Granat |
65 |
| Ai no Scenario |
62 |
| PEPPY FIX TAIKO STAR RATING PLEASE for a happier |
61 |
| Tokyo (Innovaderz Remix) |
61 |
| MIIRO |
58 |
| Hitorigoto -TV MIX- |
55 |
| Re:TrymenT |
54 |
| Haru Modoki |
52 |
| Natsukoi Hanabi |
52 |
| TSLove |
52 |
| Contrail Kiseki |
51 |
| Paradisus-Paradoxum |
51 |
| Untan Goose |
51 |
| Uso no Hibana |
50 |
| Six Trillion Years and Overnight Story |
49 |
| Gabriel Drop Kick |
43 |
| Oriental Blossom |
42 |
beatmaps$source %>% most.frequent.kable("Source")
|
24652 |
| Touhou |
2370 |
| BMS |
1557 |
| 東方Project |
754 |
| SOUND VOLTEX III GRAVITY WARS |
466 |
| DJMAX |
455 |
| beatmania IIDX |
398 |
| SOUND VOLTEX II -infinite infection- |
390 |
| osu! |
258 |
| Vocaloid |
242 |
| Taiko no Tatsujin |
222 |
| Deemo |
202 |
| jubeat |
187 |
| Nico Nico Douga |
178 |
| Love Live! School idol project |
174 |
| SOUND VOLTEX BOOTH |
169 |
| REFLEC BEAT groovin’!! |
166 |
| Cytus |
164 |
| K-ON!! |
153 |
| 艦隊これくしょん -艦これ- |
146 |
beatmaps$creator %>% most.frequent.kable("Creator")
| osuplayer111 |
572 |
| Sotarks |
571 |
| DJPop |
563 |
| tutuhaha |
387 |
| ztrot |
377 |
| Larto |
354 |
| Natsu |
345 |
| Monstrata |
331 |
| Ascendance |
311 |
| pishifat |
311 |
| Lasse |
310 |
| Gero |
304 |
| wcx19911123 |
293 |
| Milan- |
279 |
| ouranhshc |
262 |
| alacat |
248 |
| NatsumeRin |
247 |
| MoonFragrance |
234 |
| James |
232 |
| Fycho |
221 |
# Most favorited mapsets
beatmaps %>%
subset(!duplicated(beatmaps$beatmapset_id)) %>% # Keep rows with unique beatmapset_id
arrange(desc(favourite_count)) %>%
head(50) %>% `[`(c("creator", "artist", "title", "favourite_count")) %>% kable
| W h i t e |
Kuba Oms |
My Love |
13163 |
| Fort |
Panda Eyes & Teminite |
Highscore |
10292 |
| jonathanlfj |
cYsmix feat. Emmy |
Tear Rain |
9763 |
| Charles445 |
Rostik |
Liquid (Paul Rosenthal Remix) |
8282 |
| VINXIS |
Reol |
No title |
7604 |
| Ekoro |
UNDEAD CORPORATION |
Everything will freeze |
6886 |
| Kuria |
Linked Horizon |
Guren no Yumiya (TV Size) |
6880 |
| Doormat |
ClariS |
Hitorigoto -TV MIX- |
6606 |
| Awaken |
Konuko |
Toumei Elegy |
6534 |
| Voltaeyx |
TheFatRat |
Mayday (feat. Laura Brehm) |
6335 |
| Takuya |
S3RL |
Pika Girl |
5510 |
| Bearizm |
Station Earth |
Cold Green Eyes ft. Roos Denayer |
5425 |
| Saten-san |
Yousei Teikoku |
Kokou no Sousei |
5300 |
| gowww |
Hatsune Miku & Megpoid Gumi |
MATRYOSHKA |
4878 |
| h3k1ru |
Yiruma & Skullee |
River Flows In You (A Love Note) |
4849 |
| ouranhshc |
Masayoshi Minoshima feat. nomico |
Bad Apple!! |
4829 |
| -kevincela- |
Rameses B |
Flaklypa |
4829 |
| ktgster |
Chasers |
Lost |
4770 |
| Sekai-nyan |
Suzuki Konomi |
This game (TV Size) |
4618 |
| Kuria |
ONE OK ROCK |
Answer is Near |
4591 |
| eLy |
Feint |
Tower Of Heaven (You Are Slaves) |
4461 |
| Secretpipe |
S3RL |
Bass Slut (Original Mix) |
4399 |
| Monstrata |
RADWIMPS |
Zen Zen Zense (movie ver.) |
4352 |
| Natsu |
Hanatan |
Airman ga Taosenai (SOUND HOLIC Ver.) |
4328 |
| NatsumeRin |
Hatsune Miku |
Senbonzakura (Short Ver.) |
3966 |
| kristi71111 |
TK from Ling tosite sigure |
unravel (TV edit) |
3942 |
| Rue |
DJ Genericname |
Dear You |
3886 |
| Monstrata |
Porter Robinson & Madeon |
Shelter |
3860 |
| Asphyxia |
xi |
Blue Zenith |
3849 |
| Kagetsu |
KANA-BOON |
Silhouette |
3839 |
| osuplayer111 |
Getter Jaani |
Rockefeller Street (Nightcore Mix) |
3703 |
| Smoothie |
UNDEAD CORPORATION |
Yoru Naku Usagi wa Yume o Miru |
3677 |
| Star Stream |
Wotamin |
Gigantic O.T.N |
3627 |
| Kyshiro |
toby fox |
MEGALOVANIA |
3613 |
| Blue Dragon |
The Quick Brown Fox |
The Big Black |
3403 |
| RLC |
Himeringo |
Yotsuya-san ni Yoroshiku |
3377 |
| Multiple Creators |
Soleily |
Renatus |
3345 |
| Sherry |
Nanahira |
Frightfully-insane Flan-chan’s frightful song |
3327 |
| jonathanlfj |
Reol |
Plus Danshi ver Reol |
3324 |
| Len |
Shawn Wasabi |
Marble Soda |
3288 |
| Ephemeral |
Masayoshi Minoshima ft. nomico |
Bad Apple!! |
3219 |
| Tarrasky |
Agnete Kjolsrud |
Get Jinxed |
3165 |
| Guy |
Aoi Eir |
IGNITE (TV size ver.) |
3119 |
| AllStar12 |
yuikonnu & ayaponzu* |
Super Nuko World |
3033 |
| Garven |
Saiya |
Remote Control |
3028 |
| handsome |
Reol |
MONSTER |
3009 |
| Gaia |
Reol |
Asymmetry |
2993 |
| rui |
Hatsune Miku |
Rubik’s Cube |
2967 |
| Monstrata |
9mm Parabellum Bullet |
Inferno |
2959 |
| Jacob |
NOMA |
Brain Power |
2908 |
# Most played maps
beatmaps %>%
arrange(desc(playcount)) %>%
head(50) %>% `[`(c("creator", "artist", "title", "version", "playcount")) %>% kable
| W h i t e |
Kuba Oms |
My Love |
Hard |
21429513 |
| W h i t e |
Kuba Oms |
My Love |
Normal |
20405157 |
| jonathanlfj |
cYsmix feat. Emmy |
Tear Rain |
Normal |
18188427 |
| Blue Dragon |
The Quick Brown Fox |
The Big Black |
WHO’S AFRAID OF THE BIG BLACK |
14209863 |
| jonathanlfj |
cYsmix feat. Emmy |
Tear Rain |
Hard |
12840626 |
| ktgster |
Chasers |
Lost |
Normal |
12456963 |
| -kevincela- |
Rameses B |
Flaklypa |
Normal |
12124613 |
| Charles445 |
Rostik |
Liquid (Paul Rosenthal Remix) |
Easy |
10881187 |
| Blue Dragon |
Team Nekokan |
Can’t Defeat Airman |
Holy Shit! It’s Airman!! |
10878775 |
| -kevincela- |
Rameses B |
Flaklypa |
Hard |
10611576 |
| W h i t e |
Kuba Oms |
My Love |
Insane |
9287944 |
| Multiple Creators |
Soleily |
Renatus |
Normal |
8065171 |
| jonathanlfj |
cYsmix feat. Emmy |
Tear Rain |
Insane |
8028482 |
| ktgster |
Chasers |
Lost |
Hard |
7715600 |
| Rue |
DJ Genericname |
Dear You |
Dear Rue |
7548246 |
| Bearizm |
Station Earth |
Cold Green Eyes ft. Roos Denayer |
Divine |
7343941 |
| h3k1ru |
Yiruma & Skullee |
River Flows In You (A Love Note) |
Love Note |
7328010 |
| VINXIS |
Reol |
No title |
Light Insane |
6820213 |
| Charles445 |
Rostik |
Liquid (Paul Rosenthal Remix) |
Normal |
6818829 |
| val0108 |
Lily |
Scarlet Rose |
0108 style |
6684374 |
| Charles445 |
Rostik |
Liquid (Paul Rosenthal Remix) |
Hard |
6659517 |
| Ekoro |
UNDEAD CORPORATION |
Everything will freeze |
Insane |
6604128 |
| Reikin |
Nico Nico Douga |
U.N. Owen Was Her? |
Normal |
6495851 |
| Fort |
Panda Eyes & Teminite |
Highscore |
Another |
6442871 |
| Fort |
Panda Eyes & Teminite |
Highscore |
LGV’s Insane |
6255646 |
| Lust |
Tsunamaru |
Daidai Genome |
Insane |
6182514 |
| Takuya |
S3RL |
Pika Girl |
Hard |
5910259 |
| Saten-san |
Yousei Teikoku |
Kokou no Sousei |
Hard |
5760621 |
| Nakagawa-Kanon |
xi |
FREEDOM DiVE |
Another |
5520280 |
| Kuria |
Linked Horizon |
Guren no Yumiya (TV Size) |
DS’s Hard |
5517593 |
| Flask |
Fujijo Seitokai Shikkou-bu |
Best FriendS -TV Size- |
Fycho’s Insane |
5498716 |
| VINXIS |
Reol |
No title |
byfaR’s Hard |
5419243 |
| Garven |
Saiya |
Remote Control |
Insane |
5395495 |
| xxdeathx |
FLOWxGRANRODEO |
7 -seven- -TV SIZE - |
Expert |
5357990 |
| Damnae |
raja |
the light |
Normal |
5289914 |
| Kuria |
Linked Horizon |
Guren no Yumiya (TV Size) |
alacat’s Normal |
5244344 |
| Fort |
Panda Eyes & Teminite |
Highscore |
Hyper |
5227173 |
| Doormat |
ClariS |
Hitorigoto -TV MIX- |
Insane |
5173748 |
| Bearizm |
Station Earth |
Cold Green Eyes ft. Roos Denayer |
apple’s Insane |
5172218 |
| galvenize |
DJ Fresh |
Gold Dust |
Insane |
5150833 |
| Taeyang |
kradness&Reol |
Remote Control |
Max Control! |
5141365 |
| Luerxa |
Primastella |
Koigokoro |
Delis’ Insane |
5138932 |
| JauiPlaY |
DJ Okawari |
Flower Dance |
Flower |
5138709 |
| Multiple Creators |
Soleily |
Renatus |
Hard |
5085398 |
| val0108 |
Hatsune Miku |
Mythologia’s End |
Myth0108ia |
5076064 |
| Takuya |
S3RL |
Pika Girl |
Insane |
5024386 |
| Natsu |
Hanatan |
Airman ga Taosenai (SOUND HOLIC Ver.) |
Insane |
4916733 |
| Star Stream |
Sagara Kokoro |
Hoshizora no Ima |
S.S |
4879941 |
| Nakagawa-Kanon |
xi |
FREEDOM DiVE |
FOUR DIMENSIONS |
4753427 |
| Kuria |
Linked Horizon |
Guren no Yumiya (TV Size) |
Insane |
4679371 |
Scatterplots
# Scatterplot of AR vs BPM
ggplot(std, aes(bpm, diff_approach)) +
ggtitle("Approach Rate vs BPM") +
scale_x_continuous(limits=c(0,500)) +
AR_y_scale +
geom_point(alpha=0.1)

# Scatterplot of SR vs total length time
ggplot(std, aes(total_length, difficultyrating)) +
ggtitle("Star Rating vs Total Length") +
length_x_scale +
SR_y_scale +
geom_point(alpha=0.1)

# Scatterplot of max combo vs drain time
ggplot(std, aes(hit_length, max_combo)) +
ggtitle("Max Combo vs Drain Time") +
length_x_scale +
scale_y_continuous(limits=c(0,4000)) +
geom_point(alpha=0.05)

# High linear correlation, as expected
summary(lm(max_combo ~ hit_length, data=std))
##
## Call:
## lm(formula = max_combo ~ hit_length, data = std)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3860.7 -134.3 -17.4 120.5 23020.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -99.20894 2.51272 -39.48 <2e-16 ***
## hit_length 4.89613 0.01715 285.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 272.8 on 56923 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.5888, Adjusted R-squared: 0.5888
## F-statistic: 8.152e+04 on 1 and 56923 DF, p-value: < 2.2e-16
# Scatterplot of favorite count vs playcount
ggplot(std, aes(playcount, favourite_count)) +
ggtitle("Favorite Count vs Playcount") +
scale_x_continuous(limits=c(0,1000000)) +
scale_y_continuous(limits=c(0,1000)) +
geom_point(alpha=0.05)

# Scatterplot of playcount vs total length
ggplot(std, aes(total_length, playcount)) +
ggtitle("Playcount vs Total Length") +
length_x_scale +
scale_y_continuous(limits=c(0,1000000)) +
geom_point(alpha=0.1)

# Scatterplot of AR vs date approved
ggplot(std, aes(approved_date, diff_approach)) +
ggtitle("Approach Rate vs Date Approved") +
AR_y_scale +
approved_x_scale +
geom_point(alpha=0.05)

# Scatterplot of SR vs date approved
ggplot(std, aes(approved_date, difficultyrating)) +
ggtitle("Star Rating vs Date Approved") +
SR_y_scale +
approved_x_scale +
geom_point(alpha=0.1)

Spread info
# Playcount by song time, categorized by spread icon
# https://osu.ppy.sh/help/wiki/Difficulties#star-rating Not sure about values between boundaries
spread.sr = c(0, 1.51, 2.26, 3.76, 5.26, 6.76)
spread.names = c("Easy", "Normal", "Hard", "Insane", "Expert", "Expert+")
spread.colors = c("olivedrab3", "paleturquoise", "gold", "hotpink", "purple", "darkgray")
# Assign difficulty rating by spread ranges to spread names
beatmaps$spread_name = spread.names[cut(beatmaps$difficultyrating, spread.sr, right=FALSE, labels=FALSE)]
std <- beatmaps[beatmaps$mode == "Standard",] # update std
hitlength.bins = seq(0, 360, 30)
par(mfrow=c(2,3), mar=c(4,4,4,1), cex.main=2)
for (i in 1:length(spread.names)) {
std.spread = std[std$spread_name == spread.names[i], ]
playcount.bin.sum = sapply(split(std.spread, cut(std.spread$hit_length, hitlength.bins)),
function(df) sum(df$playcount))
barplot(playcount.bin.sum, space=0, width=30, xlab="Hit length (s)", ylab="Playcount Total", main=spread.names[i],
col=spread.colors[i], axisnames=FALSE)
axis(1, at=hitlength.bins)
}

# Same but 150+ hitlength and stacked bars
hitlength.bins.150 = seq(150, 360, 30)
playcount.bin.mat = matrix(ncol=length(hitlength.bins.150)-1, nrow=length(spread.names))
colnames(playcount.bin.mat) = head(hitlength.bins.150, -1)
rownames(playcount.bin.mat) = spread.names
for (i in 1:nrow(playcount.bin.mat)) {
std.spread = std[std$spread_name == spread.names[i], ]
playcount.bin.mat[i,] = sapply(split(std.spread, cut(std.spread$hit_length, hitlength.bins.150)),
function(df) sum(df$playcount))
}
dev.off() # Reset par
## null device
## 1
barplot(playcount.bin.mat, space=0, width=30, col=spread.colors, xlab="Hitlength (s)", ylab="Total Playcount",
legend.text=spread.names, axisnames=FALSE, main="Total Playcount by Hitlength and Difficulty")
axis(1, at=hitlength.bins.150-hitlength.bins.150[1], labels=hitlength.bins.150)